This is a Rmd file analyzing our raw count data by the glmmSeq package as described in the vignette and manual.

sessionInfo() #provides list of loaded packages and version of R. I still have version 4.1 for now.
## R version 4.1.2 (2021-11-01)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur 10.16
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## loaded via a namespace (and not attached):
##  [1] digest_0.6.31   R6_2.5.1        jsonlite_1.8.4  evaluate_0.20  
##  [5] cachem_1.0.7    rlang_1.1.0     cli_3.6.1       rstudioapi_0.13
##  [9] jquerylib_0.1.4 bslib_0.4.2     rmarkdown_2.21  tools_4.1.2    
## [13] xfun_0.39       yaml_2.3.7      fastmap_1.1.1   compiler_4.1.2 
## [17] htmltools_0.5.5 knitr_1.42      sass_0.4.5
if ("rtracklayer" %in% rownames(installed.packages()) == 'FALSE') BiocManager::install("rtracklayer")
if ("goseq" %in% rownames(installed.packages()) == 'FALSE') BiocManager::install('goseq') 
if ("rrvgo" %in% rownames(installed.packages()) == 'FALSE') BiocManager::install("rrvgo")
if ("GO.db" %in% rownames(installed.packages()) == 'FALSE') BiocManager::install("GO.db")
#BiocManager::install("org.Ce.eg.db", force=TRUE) #install if needed

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(goseq)
## Loading required package: BiasedUrn
## Loading required package: geneLenDataBase
## 
library(rtracklayer)
## Loading required package: GenomicRanges
## Loading required package: stats4
## Loading required package: BiocGenerics
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:dplyr':
## 
##     combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, append, as.data.frame, basename, cbind, colnames,
##     dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
##     grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
##     order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
##     rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
##     union, unique, unsplit, which.max, which.min
## Loading required package: S4Vectors
## 
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:geneLenDataBase':
## 
##     unfactor
## The following objects are masked from 'package:dplyr':
## 
##     first, rename
## The following objects are masked from 'package:base':
## 
##     expand.grid, I, unname
## Loading required package: IRanges
## 
## Attaching package: 'IRanges'
## The following objects are masked from 'package:dplyr':
## 
##     collapse, desc, slice
## Loading required package: GenomeInfoDb
library(rrvgo)
library(GO.db)
## Loading required package: AnnotationDbi
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## 
## Attaching package: 'AnnotationDbi'
## The following object is masked from 'package:dplyr':
## 
##     select
library(tidyr)
## 
## Attaching package: 'tidyr'
## The following object is masked from 'package:S4Vectors':
## 
##     expand
library(forcats)
sessionInfo() #list of packages after library-ing these packages
## R version 4.1.2 (2021-11-01)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur 10.16
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] forcats_0.5.1          tidyr_1.3.0            GO.db_3.14.0          
##  [4] AnnotationDbi_1.56.2   Biobase_2.54.0         rrvgo_1.6.0           
##  [7] rtracklayer_1.54.0     GenomicRanges_1.46.1   GenomeInfoDb_1.30.0   
## [10] IRanges_2.28.0         S4Vectors_0.32.3       BiocGenerics_0.40.0   
## [13] goseq_1.46.0           geneLenDataBase_1.30.0 BiasedUrn_2.0.9       
## [16] ggplot2_3.4.2          dplyr_1.1.2           
## 
## loaded via a namespace (and not attached):
##   [1] colorspace_2.1-0            rjson_0.2.20               
##   [3] ellipsis_0.3.2              XVector_0.34.0             
##   [5] rstudioapi_0.13             ggrepel_0.9.3              
##   [7] bit64_4.0.5                 fansi_1.0.4                
##   [9] xml2_1.3.3                  splines_4.1.2              
##  [11] cachem_1.0.7                GOSemSim_2.20.0            
##  [13] knitr_1.42                  jsonlite_1.8.4             
##  [15] Rsamtools_2.10.0            gridBase_0.4-7             
##  [17] dbplyr_2.1.1                png_0.1-7                  
##  [19] pheatmap_1.0.12             shiny_1.7.1                
##  [21] compiler_4.1.2              httr_1.4.5                 
##  [23] assertthat_0.2.1            Matrix_1.4-0               
##  [25] fastmap_1.1.1               cli_3.6.1                  
##  [27] later_1.3.0                 htmltools_0.5.5            
##  [29] prettyunits_1.1.1           tools_4.1.2                
##  [31] igraph_1.4.2                NLP_0.2-1                  
##  [33] gtable_0.3.3                glue_1.6.2                 
##  [35] GenomeInfoDbData_1.2.7      rappdirs_0.3.3             
##  [37] Rcpp_1.0.10                 slam_0.1-50                
##  [39] jquerylib_0.1.4             vctrs_0.6.2                
##  [41] Biostrings_2.62.0           nlme_3.1-153               
##  [43] xfun_0.39                   stringr_1.5.0              
##  [45] mime_0.12                   lifecycle_1.0.3            
##  [47] restfulr_0.0.15             XML_3.99-0.8               
##  [49] zlibbioc_1.40.0             scales_1.2.1               
##  [51] treemap_2.4-3               hms_1.1.1                  
##  [53] promises_1.2.0.1            MatrixGenerics_1.6.0       
##  [55] parallel_4.1.2              SummarizedExperiment_1.24.0
##  [57] RColorBrewer_1.1-3          yaml_2.3.7                 
##  [59] curl_5.0.0                  memoise_2.0.1              
##  [61] sass_0.4.5                  biomaRt_2.50.3             
##  [63] stringi_1.7.12              RSQLite_2.2.9              
##  [65] BiocIO_1.4.0                GenomicFeatures_1.46.5     
##  [67] filelock_1.0.2              BiocParallel_1.28.3        
##  [69] rlang_1.1.0                 pkgconfig_2.0.3            
##  [71] matrixStats_0.61.0          bitops_1.0-7               
##  [73] evaluate_0.20               lattice_0.20-45            
##  [75] purrr_1.0.1                 GenomicAlignments_1.30.0   
##  [77] bit_4.0.4                   tidyselect_1.2.0           
##  [79] magrittr_2.0.3              R6_2.5.1                   
##  [81] generics_0.1.3              DelayedArray_0.20.0        
##  [83] DBI_1.1.2                   pillar_1.9.0               
##  [85] withr_2.5.0                 mgcv_1.8-38                
##  [87] KEGGREST_1.34.0             RCurl_1.98-1.5             
##  [89] tibble_3.2.1                crayon_1.5.2               
##  [91] wordcloud_2.6               utf8_1.2.3                 
##  [93] BiocFileCache_2.2.1         rmarkdown_2.21             
##  [95] progress_1.2.2              grid_4.1.2                 
##  [97] data.table_1.14.8           blob_1.2.2                 
##  [99] digest_0.6.31               xtable_1.8-4               
## [101] tm_0.7-11                   httpuv_1.6.4               
## [103] munsell_0.5.0               bslib_0.4.2
DEGs <- read.csv(file="../../../output/Slope_Base/signif_genes_normcts.csv", sep=',', header=TRUE)  %>% dplyr::select(!c('X'))

#NOTE! This is not a file only with differentially expressed genes, this contains all of the genes in our dataset but also contains p-value information and fold change information to help determine which genes are signficant DEGs based on our model in glmmSeq

rownames(DEGs) <- DEGs$Gene

dim(DEGs)
## [1] 9011   75
Origin_DEGs <- DEGs %>%  dplyr::filter(Origin < 0.05)

nrow(Origin_DEGs)
## [1] 840
genes <- rownames(DEGs)

Generate files for Functional Enrichment

Based off of Ariana’s script

Functional annotation file obtained from: http://cyanophora.rutgers.edu/Pocillopora_acuta/Pocillopora_acuta_HIv2.genes.EggNog_results.txt.gz on April 3, 2023.

Pacuta.annot <- read.delim("../../../data/Pocillopora_acuta_HIv2.genes.EggNog_results.txt") %>% dplyr::rename("query" = X.query)

dim(Pacuta.annot)
## [1] 16784    21
head(Pacuta.annot)
##                                        query         seed_ortholog    evalue
## 1 Pocillopora_acuta_HIv2___RNAseq.g24143.t1a        45351.EDO48725 2.16e-120
## 2  Pocillopora_acuta_HIv2___RNAseq.g18333.t1        45351.EDO38694 3.18e-123
## 3   Pocillopora_acuta_HIv2___RNAseq.g7985.t1 192875.XP_004345759.1 1.70e-183
## 4  Pocillopora_acuta_HIv2___RNAseq.g13511.t1        45351.EDO28722  2.94e-48
## 5      Pocillopora_acuta_HIv2___TS.g15308.t1  10224.XP_006813307.1  3.19e-20
## 6   Pocillopora_acuta_HIv2___RNAseq.g2057.t1 106582.XP_004573970.1  3.70e-14
##   score
## 1 364.0
## 2 355.0
## 3 526.0
## 4 172.0
## 5  92.4
## 6  68.2
##                                                                                                                                                                 eggNOG_OGs
## 1                                                                                       COG0620@1|root,KOG2263@2759|Eukaryota,38GZS@33154|Opisthokonta,3BNKS@33208|Metazoa
## 2                                                                                       COG0450@1|root,KOG0852@2759|Eukaryota,38B9P@33154|Opisthokonta,3BGS4@33208|Metazoa
## 3                                                                                                           COG3239@1|root,KOG4232@2759|Eukaryota,39UMW@33154|Opisthokonta
## 4                                                                                           2ED36@1|root,2SITK@2759|Eukaryota,39IIK@33154|Opisthokonta,3C3PY@33208|Metazoa
## 5                                                                                       COG2801@1|root,KOG0017@2759|Eukaryota,38F42@33154|Opisthokonta,3BA5H@33208|Metazoa
## 6 2CSTD@1|root,2S4A7@2759|Eukaryota,3A79X@33154|Opisthokonta,3BT5E@33208|Metazoa,3D9B1@33213|Bilateria,48FVM@7711|Chordata,49CYZ@7742|Vertebrata,4A886@7898|Actinopterygii
##        max_annot_lvl COG_category
## 1      33208|Metazoa            E
## 2      33208|Metazoa            O
## 3 33154|Opisthokonta            I
## 4      33208|Metazoa            -
## 5      33208|Metazoa           OU
## 6      33208|Metazoa            S
##                                                      Description Preferred_name
## 1               Cobalamin-independent synthase, Catalytic domain              -
## 2            negative regulation of male germ cell proliferation          PRDX4
## 3                                          Fatty acid desaturase              -
## 4                                                              -              -
## 5                                                   K02A2.6-like              -
## 6 Phosphatidylinositol N-acetylglucosaminyltransferase subunit Y           PIGY
##                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         GOs
## 1                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         -
## 2 GO:0000003,GO:0001775,GO:0002252,GO:0002263,GO:0002274,GO:0002275,GO:0002283,GO:0002366,GO:0002376,GO:0002443,GO:0002444,GO:0002446,GO:0003006,GO:0003674,GO:0003824,GO:0004601,GO:0005488,GO:0005515,GO:0005575,GO:0005576,GO:0005615,GO:0005622,GO:0005623,GO:0005737,GO:0005783,GO:0005790,GO:0005829,GO:0006082,GO:0006457,GO:0006464,GO:0006468,GO:0006520,GO:0006575,GO:0006793,GO:0006796,GO:0006807,GO:0006810,GO:0006887,GO:0006915,GO:0006950,GO:0006952,GO:0006955,GO:0006979,GO:0007154,GO:0007165,GO:0007249,GO:0007252,GO:0007275,GO:0007276,GO:0007283,GO:0007548,GO:0008150,GO:0008152,GO:0008219,GO:0008285,GO:0008379,GO:0008406,GO:0008584,GO:0009056,GO:0009266,GO:0009409,GO:0009605,GO:0009607,GO:0009617,GO:0009628,GO:0009636,GO:0009893,GO:0009966,GO:0009967,GO:0009987,GO:0010467,GO:0010604,GO:0010646,GO:0010647,GO:0010941,GO:0010942,GO:0010950,GO:0010952,GO:0012501,GO:0012505,GO:0016043,GO:0016192,GO:0016209,GO:0016310,GO:0016491,GO:0016684,GO:0016999,GO:0017001,GO:0017144,GO:0019222,GO:0019471,GO:0019538,GO:0019725,GO:0019752,GO:0019953,GO:0022414,GO:0022417,GO:0023051,GO:0023052,GO:0023056,GO:0030141,GO:0030162,GO:0030198,GO:0031323,GO:0031325,GO:0031410,GO:0031974,GO:0031982,GO:0031983,GO:0032268,GO:0032270,GO:0032501,GO:0032502,GO:0032504,GO:0032940,GO:0033554,GO:0034774,GO:0035556,GO:0036211,GO:0036230,GO:0042119,GO:0042127,GO:0042221,GO:0042592,GO:0042737,GO:0042742,GO:0042743,GO:0042744,GO:0042802,GO:0042803,GO:0042981,GO:0043062,GO:0043065,GO:0043067,GO:0043068,GO:0043085,GO:0043170,GO:0043207,GO:0043226,GO:0043227,GO:0043229,GO:0043231,GO:0043233,GO:0043280,GO:0043281,GO:0043299,GO:0043312,GO:0043412,GO:0043436,GO:0043900,GO:0043901,GO:0044093,GO:0044237,GO:0044238,GO:0044248,GO:0044260,GO:0044267,GO:0044281,GO:0044421,GO:0044422,GO:0044424,GO:0044433,GO:0044444,GO:0044446,GO:0044464,GO:0044703,GO:0045055,GO:0045137,GO:0045321,GO:0045454,GO:0045862,GO:0046425,GO:0046427,GO:0046546,GO:0046661,GO:0046903,GO:0046983,GO:0048232,GO:0048513,GO:0048518,GO:0048519,GO:0048522,GO:0048523,GO:0048583,GO:0048584,GO:0048608,GO:0048609,GO:0048731,GO:0048856,GO:0050789,GO:0050790,GO:0050794,GO:0050896,GO:0051171,GO:0051173,GO:0051179,GO:0051186,GO:0051187,GO:0051234,GO:0051239,GO:0051241,GO:0051246,GO:0051247,GO:0051336,GO:0051345,GO:0051604,GO:0051704,GO:0051707,GO:0051716,GO:0051920,GO:0052547,GO:0052548,GO:0055114,GO:0060205,GO:0060255,GO:0061458,GO:0065007,GO:0065008,GO:0065009,GO:0070013,GO:0070417,GO:0070887,GO:0071704,GO:0071840,GO:0072593,GO:0080090,GO:0097190,GO:0097237,GO:0097708,GO:0098542,GO:0098754,GO:0098869,GO:0099503,GO:0101002,GO:1901564,GO:1901605,GO:1902531,GO:1902533,GO:1904813,GO:1904892,GO:1904894,GO:1905936,GO:1905937,GO:1990748,GO:2000116,GO:2000241,GO:2000242,GO:2000254,GO:2000255,GO:2001056,GO:2001233,GO:2001235,GO:2001267,GO:2001269
## 3                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         -
## 4                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         -
## 5                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         -
## 6                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 GO:0000506,GO:0005575,GO:0005622,GO:0005623,GO:0005737,GO:0005783,GO:0005789,GO:0005886,GO:0006464,GO:0006497,GO:0006505,GO:0006506,GO:0006629,GO:0006643,GO:0006644,GO:0006650,GO:0006661,GO:0006664,GO:0006793,GO:0006796,GO:0006807,GO:0008150,GO:0008152,GO:0008610,GO:0008654,GO:0009058,GO:0009059,GO:0009247,GO:0009987,GO:0012505,GO:0016020,GO:0016254,GO:0019538,GO:0019637,GO:0031984,GO:0032991,GO:0034645,GO:0036211,GO:0042157,GO:0042158,GO:0042175,GO:0043170,GO:0043226,GO:0043227,GO:0043229,GO:0043231,GO:0043412,GO:0044237,GO:0044238,GO:0044249,GO:0044255,GO:0044260,GO:0044267,GO:0044422,GO:0044424,GO:0044425,GO:0044432,GO:0044444,GO:0044446,GO:0044464,GO:0045017,GO:0046467,GO:0046474,GO:0046486,GO:0046488,GO:0071704,GO:0071944,GO:0090407,GO:0098796,GO:0098827,GO:1901135,GO:1901137,GO:1901564,GO:1901566,GO:1901576,GO:1902494,GO:1903509,GO:1990234
##           EC   KEGG_ko
## 1   2.1.1.14 ko:K00549
## 2  1.11.1.15 ko:K03386
## 3 1.14.19.31 ko:K12418
## 4          -         -
## 5          -         -
## 6          - ko:K11001
##                                                                           KEGG_Pathway
## 1 ko00270,ko00450,ko01100,ko01110,ko01230,map00270,map00450,map01100,map01110,map01230
## 2                                                                     ko04214,map04214
## 3                                                                                    -
## 4                                                                                    -
## 5                                                                                    -
## 6                                                    ko00563,ko01100,map00563,map01100
##   KEGG_Module KEGG_Reaction             KEGG_rclass
## 1      M00017 R04405,R09365 RC00035,RC00113,RC01241
## 2           -             -                       -
## 3           -             -                       -
## 4           -             -                       -
## 5           -             -                       -
## 6           -        R05916                       -
##                             BRITE KEGG_TC CAZy BiGG_Reaction
## 1 ko00000,ko00001,ko00002,ko01000       -    -             -
## 2 ko00000,ko00001,ko01000,ko04147       -    -             -
## 3         ko00000,ko01000,ko01004       -    -             -
## 4                               -       -    -             -
## 5                               -       -    -             -
## 6                 ko00000,ko00001       -    -             -
##                  PFAMs
## 1          Meth_synt_2
## 2  1-cysPrx_C,AhpC-TSA
## 3 Cyt-b5,FA_desaturase
## 4                    -
## 5            RVT_1,rve
## 6                PIG-Y
genes2annot = match(genes, Pacuta.annot$query) #match genes in DEGs (all genes after filtering) to genes in annotation file

sum(is.na(genes2annot)) #number of genes without EggNog annotation
## [1] 2812
missing<-as.data.frame(genes[which(is.na(genes2annot))]) #dataframe of genes without EggNog annotation
head(missing)
##            genes[which(is.na(genes2annot))]
## 1 Pocillopora_acuta_HIv2___RNAseq.g7479.t3a
## 2 Pocillopora_acuta_HIv2___RNAseq.g7479.t3b
## 3   Pocillopora_acuta_HIv2___RNAseq.g682.t1
## 4  Pocillopora_acuta_HIv2___RNAseq.g4805.t1
## 5 Pocillopora_acuta_HIv2___RNAseq.g11061.t1
## 6 Pocillopora_acuta_HIv2___RNAseq.g16217.t1

2812/9011 genes without eggnog annotation

names(Pacuta.annot)
##  [1] "query"          "seed_ortholog"  "evalue"         "score"         
##  [5] "eggNOG_OGs"     "max_annot_lvl"  "COG_category"   "Description"   
##  [9] "Preferred_name" "GOs"            "EC"             "KEGG_ko"       
## [13] "KEGG_Pathway"   "KEGG_Module"    "KEGG_Reaction"  "KEGG_rclass"   
## [17] "BRITE"          "KEGG_TC"        "CAZy"           "BiGG_Reaction" 
## [21] "PFAMs"
geneInfo0 = data.frame(gene_id = genes, #add gene id
  Accession = Pacuta.annot$seed_ortholog[genes2annot], #add accession number
  Bitscore = Pacuta.annot$score[genes2annot], #add bitscore
  eValue = Pacuta.annot$evalue[genes2annot], #add e value
  Description = Pacuta.annot$Description[genes2annot], #add description of gene
  Annotation.GO.ID = Pacuta.annot$GOs[genes2annot], #add GO ID's
  q_Origin = DEGs$Origin, #add Origin adjusted p-value
  q_Treatment = DEGs$Treatment, #add Treatment adjusted p-value
  q_Interaction = DEGs$Treatment.Origin, #add Treatment:Origin adjusted p-value
  Stable_OriginFC = DEGs$Stable_OriginFC, #add fold change for Slope vs Flat in the stable treatment
  Variable_OriginFC = DEGs$Variable_OriginFC, #add fold change for Slope vs Flat in the variable treatment
  maxGroupFC = DEGs$maxGroupFC, #add max group fold change (was FC bigger in stable of variable treatment)
  col = DEGs$col) #add qualitative significance info

dim(geneInfo0)
## [1] 9011   13
head(geneInfo0,2)
##                                     gene_id      Accession Bitscore    eValue
## 1 Pocillopora_acuta_HIv2___RNAseq.g27841.t1 45351.EDO35402      566 4.00e-197
## 2 Pocillopora_acuta_HIv2___RNAseq.g14011.t1 45351.EDO48592      449 1.17e-151
##                                                 Description
## 1 cyclin-dependent protein serine/threonine kinase activity
## 2          TAF6-like RNA polymerase II, p300 CBP-associated
##                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                Annotation.GO.ID
## 1                                                                                                                                                                                                                                                                                                                                                                                                                                                                               GO:0000003,GO:0003006,GO:0003674,GO:0003824,GO:0004672,GO:0004674,GO:0004693,GO:0005575,GO:0005622,GO:0005623,GO:0005634,GO:0005737,GO:0005813,GO:0005815,GO:0005856,GO:0006464,GO:0006468,GO:0006793,GO:0006796,GO:0006807,GO:0007154,GO:0007165,GO:0007548,GO:0008150,GO:0008152,GO:0009987,GO:0015630,GO:0016301,GO:0016310,GO:0016740,GO:0016772,GO:0016773,GO:0019538,GO:0022414,GO:0023052,GO:0032502,GO:0036211,GO:0043170,GO:0043226,GO:0043227,GO:0043228,GO:0043229,GO:0043231,GO:0043232,GO:0043412,GO:0044237,GO:0044238,GO:0044260,GO:0044267,GO:0044422,GO:0044424,GO:0044430,GO:0044446,GO:0044464,GO:0050789,GO:0050794,GO:0050896,GO:0051716,GO:0051726,GO:0065007,GO:0071704,GO:0097472,GO:0140096,GO:1901564
## 2 GO:0000118,GO:0000123,GO:0003674,GO:0003676,GO:0003677,GO:0003712,GO:0003713,GO:0005488,GO:0005575,GO:0005622,GO:0005623,GO:0005634,GO:0005654,GO:0005730,GO:0006325,GO:0006338,GO:0006355,GO:0006357,GO:0006464,GO:0006473,GO:0006475,GO:0006807,GO:0006996,GO:0008150,GO:0008152,GO:0009889,GO:0009891,GO:0009893,GO:0009987,GO:0010468,GO:0010556,GO:0010557,GO:0010604,GO:0016043,GO:0016569,GO:0016570,GO:0016573,GO:0016604,GO:0016607,GO:0018193,GO:0018205,GO:0018393,GO:0018394,GO:0019219,GO:0019222,GO:0019538,GO:0030914,GO:0031248,GO:0031323,GO:0031325,GO:0031326,GO:0031328,GO:0031974,GO:0031981,GO:0032991,GO:0036211,GO:0043170,GO:0043226,GO:0043227,GO:0043228,GO:0043229,GO:0043231,GO:0043232,GO:0043233,GO:0043412,GO:0043543,GO:0043966,GO:0044237,GO:0044238,GO:0044260,GO:0044267,GO:0044422,GO:0044424,GO:0044428,GO:0044446,GO:0044451,GO:0044464,GO:0045935,GO:0048518,GO:0048522,GO:0050789,GO:0050794,GO:0051171,GO:0051173,GO:0051252,GO:0051254,GO:0051276,GO:0060255,GO:0065007,GO:0070013,GO:0070461,GO:0071704,GO:0071840,GO:0080090,GO:0097159,GO:0140110,GO:1901363,GO:1901564,GO:1902493,GO:1902494,GO:1902680,GO:1903506,GO:1903508,GO:1990234,GO:2000112,GO:2001141
##    q_Origin q_Treatment q_Interaction Stable_OriginFC Variable_OriginFC
## 1 0.3325688           1     0.9994279      -0.2177817        -0.1121968
## 2 0.1514413           1     0.9994279       0.6976765         0.2428565
##   maxGroupFC             col
## 1     Stable Not Significant
## 2     Stable Not Significant

Add KEGG annotation information. Downloaded from: http://cyanophora.rutgers.edu/Pocillopora_acuta/Pocillopora_acuta_HIv2.genes.KEGG_results.txt.gz on April 3, 2023.

kegg<-read.table("../../../data/Pocillopora_acuta_HIv2.genes.KEGG_results.txt", sep="", quote="", na.strings=c("","NA"), blank.lines.skip = FALSE, header=FALSE)

dim(kegg)
## [1] 33730     2
head(kegg)
##                                           V1     V2
## 1 Pocillopora_acuta_HIv2___RNAseq.g24143.t1a   <NA>
## 2 Pocillopora_acuta_HIv2___RNAseq.g24143.t1b K22584
## 3  Pocillopora_acuta_HIv2___RNAseq.g22918.t1   <NA>
## 4  Pocillopora_acuta_HIv2___RNAseq.g18333.t1 K03386
## 5   Pocillopora_acuta_HIv2___RNAseq.g7985.t1   <NA>
## 6  Pocillopora_acuta_HIv2___RNAseq.g13511.t1   <NA>

Add KEGG annotations to each gene.

geneInfo0$KEGG <- kegg$V2[match(geneInfo0$gene_id, kegg$V1)]

Order geneInfo0 by significance of Origin, q_Origin (adjusted p value)

geneInfo <- geneInfo0[order(geneInfo0[, 'q_Origin']), ]

write.csv(geneInfo, file = "../../../output/Slope_Base/geneInfo.csv") #gene info for reference/supplement

Now onto Enrichment!

#geneInfo<-read.csv("../../../output/Slope_Base/geneInfo.csv")

dim(geneInfo)
## [1] 9011   14

Get gene length information.

#import file
gff <- rtracklayer::import("../../../data/Pocillopora_acuta_HIv2.genes_fixed.gff3") #if this doesn't work, restart R and try again 
transcripts <- subset(gff, type == "transcript") #keep only transcripts , not CDS or exons
transcript_lengths <- width(transcripts) #isolate length of each gene
seqnames<-transcripts$ID #extract list of gene id 
lengths<-cbind(seqnames, transcript_lengths)
lengths<-as.data.frame(lengths) #convert to data frame

geneInfo$Length<-lengths$transcript_lengths[match(geneInfo$gene_id, lengths$seqnames)] #Add in length to geneInfo

Format GO terms to remove dashes and quotes and separate by semicolons (replace , with ;) in Annotation.GO.ID column

geneInfo$Annotation.GO.ID <- gsub(",", ";", geneInfo$Annotation.GO.ID)
geneInfo$Annotation.GO.ID <- gsub('"', "", geneInfo$Annotation.GO.ID)
geneInfo$Annotation.GO.ID <- gsub("-", NA, geneInfo$Annotation.GO.ID)
### Generate vector with names of all genes 
ALL.vector <- c(geneInfo$gene_id)

### Generate length vector for all genes
LENGTH.vector <- as.integer(geneInfo$Length)

### Generate vector with names of the 840 significant DEGs by Origin
ID.vector <- geneInfo %>%
  filter(q_Origin < 0.05) %>%
  pull(gene_id)

##Get a list of GO Terms for the 840 significant DEGs by Origin
GO.terms <- geneInfo %>%
  filter(q_Origin < 0.05) %>%
  dplyr::select(gene_id, Annotation.GO.ID)

##Format to have one goterm per row with gene ID repeated
split <- strsplit(as.character(GO.terms$Annotation.GO.ID), ";")
split2 <- data.frame(v1 = rep.int(GO.terms$gene, sapply(split, length)), v2 = unlist(split))
colnames(split2) <- c("gene", "Annotation.GO.ID")
GO.terms <- split2

##Construct list of genes with 1 for genes in that are significant for Origin and 0 for those that are not
gene.vector = as.integer(ALL.vector %in% ID.vector) #since we ordered geneInfo by q_Origin, this puts a "1" for the first 840 genes, which are the same genes in ID.vector

names(gene.vector) <- ALL.vector #set names

#weight gene vector by bias for length of gene
pwf <- nullp(gene.vector, ID.vector, bias.data = LENGTH.vector) 

#run goseq using Wallenius method for all categories of GO terms 
GO.wall <- goseq(pwf, ID.vector, gene2cat=GO.terms, method="Wallenius", use_genes_without_cat=TRUE)
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
GO <- GO.wall[order(GO.wall$over_represented_pvalue),]

colnames(GO)[1] <- "GOterm"
    
#adjust p-values 
GO$bh_adjust <-  p.adjust(GO$over_represented_pvalue, method="BH") #add adjusted p-values

#Write file of results 
write.csv(GO, "../../../output/Slope_Base/GOSeq/GOseq_DEG_Origin.csv")

#Filtering for p < 0.05
GO_05 <- GO %>%
        dplyr::filter(bh_adjust<0.05) %>%
        dplyr::arrange(., ontology, bh_adjust)

#Filtering for p < 0.05
GO_001 <- GO %>%
        dplyr::filter(bh_adjust<0.001) %>%
        dplyr::arrange(., ontology, bh_adjust)

Plotting!

ontologies<-c("BP", "CC", "MF")

for (category in ontologies) {
    
    go_results <- GO_05
    
    go_results<-go_results%>%
      filter(ontology==category)%>%
      filter(bh_adjust != "NA") %>%
      filter(numInCat>10)%>%
      arrange(., bh_adjust)
    
      #Reduce/collapse GO term set with the rrvgo package 
      simMatrix <- calculateSimMatrix(go_results$GOterm,
                                orgdb="org.Ce.eg.db", #c. elegans database
                                ont=category,
                                method="Rel")
    #calculate similarity 
    scores <- setNames(-log(go_results$bh_adjust), go_results$GOterm)
    reducedTerms <- reduceSimMatrix(simMatrix,
                                scores,
                                threshold=0.7,
                                orgdb="org.Ce.eg.db")
    
    #keep only the goterms from the reduced list
    go_results<-go_results%>%
      filter(GOterm %in% reducedTerms$go)
    
    #add in parent terms to list of go terms 
    go_results$ParentTerm<-reducedTerms$parentTerm[match(go_results$GOterm, reducedTerms$go)]
   
    #plot significantly enriched GO terms by Slim Category faceted by slim term 
  GO.plot <-  ggplot2::ggplot(go_results, aes(x = ontology, y = term)) + 
    geom_point(aes(size=bh_adjust)) + 
    scale_size(name="Over rep. p-value", trans="reverse", range=c(1,3))+
    facet_grid(ParentTerm ~ ., scales = "free", labeller = label_wrap_gen(width = 5, multi_line = TRUE))+
    theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"),
    strip.text.y = element_text(angle=0, size = 10),
    strip.text.x = element_text(size = 20),
    axis.text = element_text(size = 8),
    axis.title.x = element_blank(),
    axis.title.y = element_blank())
  
  print(GO.plot)
  
ggsave(filename=paste0("../../../output/Slope_Base/GOSeq/OriginDEGs_P05", "_", category, ".png"), plot=GO.plot, dpi=100, width=12, height=100, units="in", limitsize=FALSE)
}
## 
## preparing gene to GO mapping data...
## preparing IC data...
## preparing gene to GO mapping data...
## preparing IC data...

## preparing gene to GO mapping data...
## preparing IC data...

Combining MF, CC, and BP into one plot and order by pvalue, based on Jill’s script

GO_plot_all <- GO_001 %>% drop_na(ontology) %>% mutate(term = fct_reorder(term, numDEInCat)) %>%
  mutate(term = fct_reorder(term, ontology)) %>%
  ggplot( aes(x=term, y=numDEInCat) ) +
  geom_segment( aes(x=term ,xend=term, y=0, yend=numDEInCat), color="grey") +
  geom_text(aes(label = over_represented_pvalue), hjust = -1, vjust = 0, size = 2) +
  geom_point(size=1, aes(colour = ontology)) +
  coord_flip() +
  ylim(0,305) +
  theme(
    panel.grid.minor.y = element_blank(),
    panel.grid.major.y = element_blank(),
    legend.position="bottom"
  ) +
  xlab("") +
  ylab("") +
  theme_bw() + #Set background color 
  theme(panel.border = element_blank(), # Set border
        panel.grid.major = element_blank(), #Set major gridlines
        panel.grid.minor = element_blank(), #Set minor gridlines
        axis.line = element_line(colour = "black"), #Set axes color
        plot.background=element_blank()) #Set the plot background #set title attributes

GO_plot_all

ggsave("../../../output/Slope_Base/GOSeq/OriginDEGs_P001.pdf", GO_plot_all, width = 28, height = 175, units = c("in"), dpi=100, limitsize=FALSE)

#ordered by p-value
GO_plot_all_pval <- GO_001 %>% drop_na(ontology) %>% mutate(term = fct_reorder(term, over_represented_pvalue)) %>%
  mutate(term = fct_reorder(term, ontology)) %>%
  ggplot( aes(x=term, y=numDEInCat) ) +
  geom_segment( aes(x=term ,xend=term, y=0, yend=numDEInCat), color="grey") +
  geom_text(aes(label = over_represented_pvalue), hjust = -1, vjust = 0, size = 2) +
  geom_point(size=1, aes(colour = ontology)) +
  coord_flip() +
  ylim(0,305) +
  theme(
    panel.grid.minor.y = element_blank(),
    panel.grid.major.y = element_blank(),
    legend.position="bottom"
  ) +
  xlab("") +
  ylab("") +
  theme_bw() + #Set background color 
  theme(panel.border = element_blank(), # Set border
        panel.grid.major = element_blank(), #Set major gridlines
        panel.grid.minor = element_blank(), #Set minor gridlines
        axis.line = element_line(colour = "black"), #Set axes color
        plot.background=element_blank()) #Set the plot background #set title attributes

GO_plot_all_pval

ggsave("../../../output/Slope_Base/GOSeq/OriginDEGs_P001_orderedpval.pdf", GO_plot_all_pval, width = 28, height = 175, units = c("in"), dpi=100, limitsize=FALSE)

KEGG enrichment

##Get a list of KEGG Terms for the 840 significant DEGs by Origin
KO.terms <- geneInfo %>%
  filter(q_Origin < 0.05) %>%
  dplyr::select(gene_id, KEGG)


#run goseq using Wallenius method for all categories of KEGG terms

KO.wall <-
  goseq(
    pwf,
    ID.vector,
    gene2cat = KO.terms,
    test.cats = c("KEGG"),
    method = "Wallenius",
    use_genes_without_cat = TRUE
  )
## Using manually entered categories.
## Calculating the p-values...
KO <- KO.wall[order(KO.wall$over_represented_pvalue), ]
colnames(KO)[1] <- "KEGGterm"

#adjust p-values
KO$bh_adjust <- p.adjust(KO$over_represented_pvalue, method = "BH") #add adjusted p-values

#Filtering for p < 0.05
KO_05 <- KO %>%
  dplyr::filter(bh_adjust < 0.05) %>%
  dplyr::arrange(., bh_adjust)

head(KO_05)
##   KEGGterm over_represented_pvalue under_represented_pvalue numDEInCat numInCat
## 1   K17592            2.847827e-05                        1          4        4
##     bh_adjust
## 1 0.009597177
#Write file of results 
write.csv(KO, "../../../output/Slope_Base/GOSeq/GOseq_KEGG_Origin.csv")

Info about this KEGG term: K17592: SACS; sacsin

What about the direction of FC: Genes significant by Origin and upregulated in Slope origin (positive FC) for both treatments (we should also look into the ones that are positive in one treatment but not the other)

### Generate vector with names of all genes 
ALL.vector <- c(geneInfo$gene_id)

### Generate length vector for all genes
LENGTH.vector <- as.integer(geneInfo$Length)

### Generate vector with names of the 840 significant DEGs by Origin
ID.vector_upSlope <- geneInfo %>%
  filter(q_Origin < 0.05) %>%
  filter(Stable_OriginFC > 0 & Variable_OriginFC > 0) %>%
  pull(gene_id)

##Get a list of GO Terms for the 840 significant DEGs by Origin
GO.terms_upSlope <- geneInfo %>%
  filter(q_Origin < 0.05) %>%
  filter(Stable_OriginFC > 0 & Variable_OriginFC > 0) %>%
  dplyr::select(gene_id, Annotation.GO.ID)

dim(GO.terms_upSlope) #358 genes are upregulated in the Slope Origin in both treatments and significant by Origin
## [1] 346   2
##Format to have one goterm per row with gene ID repeated
split <- strsplit(as.character(GO.terms_upSlope$Annotation.GO.ID), ";")
split2 <- data.frame(v1 = rep.int(GO.terms_upSlope$gene, sapply(split, length)), v2 = unlist(split))
colnames(split2) <- c("gene", "Annotation.GO.ID")
GO.terms_upSlope <- split2

##Construct list of genes with 1 for genes in that are significant for Origin and upregulated in Slope and 0 for those that are not

gene.vector_upSlope = as.integer(ALL.vector %in% ID.vector_upSlope) 

names(gene.vector_upSlope) <- ALL.vector #set names

#weight gene vector by bias for length of gene
pwf_upSlope <- nullp(gene.vector_upSlope, ID.vector_upSlope, bias.data = LENGTH.vector) 

#run goseq using Wallenius method for all categories of GO terms 
GO.wall_upSlope <- goseq(pwf_upSlope, ID.vector_upSlope, gene2cat=GO.terms_upSlope, method="Wallenius", use_genes_without_cat=TRUE)
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
GO_upSlope <- GO.wall_upSlope[order(GO.wall_upSlope$over_represented_pvalue),]

colnames(GO_upSlope)[1] <- "GOterm"
    
#adjust p-values 
GO_upSlope$bh_adjust <-  p.adjust(GO_upSlope$over_represented_pvalue, method="BH") #add adjusted p-values

#Write file of results 
write.csv(GO_upSlope, "../../../output/Slope_Base/GOSeq/GOseq_DEG_Origin_upSlope.csv")

#Filtering for p < 0.05
GO_upSlope_05 <- GO_upSlope %>%
        dplyr::filter(bh_adjust<0.05) %>%
        dplyr::arrange(., ontology, bh_adjust)

#Filtering for p < 0.05
GO_upSlope_001 <- GO_upSlope %>%
        dplyr::filter(bh_adjust<0.001) %>%
        dplyr::arrange(., ontology, bh_adjust)

Plotting!

ontologies<-c("BP", "CC", "MF")

for (category in ontologies) {
    
    go_results <- GO_upSlope_05
    
    go_results<-go_results%>%
      filter(ontology==category)%>%
      filter(bh_adjust != "NA") %>%
      filter(numInCat>10)%>%
      arrange(., bh_adjust)
    
      #Reduce/collapse GO term set with the rrvgo package 
      simMatrix <- calculateSimMatrix(go_results$GOterm,
                                orgdb="org.Ce.eg.db", #c. elegans database
                                ont=category,
                                method="Rel")
    #calculate similarity 
    scores <- setNames(-log(go_results$bh_adjust), go_results$GOterm)
    reducedTerms <- reduceSimMatrix(simMatrix,
                                scores,
                                threshold=0.7,
                                orgdb="org.Ce.eg.db")
    
    #keep only the goterms from the reduced list
    go_results<-go_results%>%
      filter(GOterm %in% reducedTerms$go)
    
    #add in parent terms to list of go terms 
    go_results$ParentTerm<-reducedTerms$parentTerm[match(go_results$GOterm, reducedTerms$go)]
   
    #plot significantly enriched GO terms by Slim Category faceted by slim term 
  GO.plot_upslope <-  ggplot2::ggplot(go_results, aes(x = ontology, y = term)) + 
    geom_point(aes(size=bh_adjust)) + 
    scale_size(name="Over rep. p-value", trans="reverse", range=c(1,3))+
    facet_grid(ParentTerm ~ ., scales = "free", labeller = label_wrap_gen(width = 5, multi_line = TRUE))+
    theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"),
    strip.text.y = element_text(angle=0, size = 10),
    strip.text.x = element_text(size = 20),
    axis.text = element_text(size = 8),
    axis.title.x = element_blank(),
    axis.title.y = element_blank())
  
  print(GO.plot_upslope)
  
ggsave(filename=paste0("../../../output/Slope_Base/GOSeq/OriginDEGs_upslope_P05", "_", category, ".png"), plot=GO.plot_upslope, dpi=100, width=12, height=100, units="in", limitsize=FALSE)
}
## preparing gene to GO mapping data...
## preparing IC data...
## preparing gene to GO mapping data...
## preparing IC data...

## preparing gene to GO mapping data...
## preparing IC data...

What about the direction of FC: Genes significant by Origin and upregulated in Flat origin (negative FC) for both treatments (we should also look into the ones that are positive in one treatment but not the other)

### Generate vector with names of all genes 
ALL.vector <- c(geneInfo$gene_id)

### Generate length vector for all genes
LENGTH.vector <- as.integer(geneInfo$Length)

### Generate vector with names of the 840 significant DEGs by Origin
ID.vector_upFlat <- geneInfo %>%
  filter(q_Origin < 0.05) %>%
  filter(Stable_OriginFC < 0 & Variable_OriginFC < 0) %>%
  pull(gene_id)

##Get a list of GO Terms for the 840 significant DEGs by Origin
GO.terms_upFlat <- geneInfo %>%
  filter(q_Origin < 0.05) %>%
  filter(Stable_OriginFC < 0 & Variable_OriginFC < 0) %>%
  dplyr::select(gene_id, Annotation.GO.ID)

dim(GO.terms_upFlat) #480 genes are upregulated in the Flat Origin in both treatments and significant by Origin
## [1] 481   2
##Format to have one goterm per row with gene ID repeated
split <- strsplit(as.character(GO.terms_upFlat$Annotation.GO.ID), ";")
split2 <- data.frame(v1 = rep.int(GO.terms_upFlat$gene, sapply(split, length)), v2 = unlist(split))
colnames(split2) <- c("gene", "Annotation.GO.ID")
GO.terms_upFlat <- split2

##Construct list of genes with 1 for genes in that are significant for Origin and upregulated in Flat and 0 for those that are not

gene.vector_upFlat = as.integer(ALL.vector %in% ID.vector_upFlat) 

names(gene.vector_upFlat) <- ALL.vector #set names

#weight gene vector by bias for length of gene
pwf_upFlat <- nullp(gene.vector_upFlat, ID.vector_upFlat, bias.data = LENGTH.vector) 

#run goseq using Wallenius method for all categories of GO terms 
GO.wall_upFlat <- goseq(pwf_upFlat, ID.vector_upFlat, gene2cat=GO.terms_upFlat, method="Wallenius", use_genes_without_cat=TRUE)
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
GO_upFlat <- GO.wall_upFlat[order(GO.wall_upFlat$over_represented_pvalue),]

colnames(GO_upFlat)[1] <- "GOterm"
    
#adjust p-values 
GO_upFlat$bh_adjust <-  p.adjust(GO_upFlat$over_represented_pvalue, method="BH") #add adjusted p-values

#Write file of results 
write.csv(GO_upFlat, "../../../output/Slope_Base/GOSeq/GOseq_DEG_Origin_upFlat.csv")

#Filtering for p < 0.05
GO_upFlat_05 <- GO_upFlat %>%
        dplyr::filter(bh_adjust<0.05) %>%
        dplyr::arrange(., ontology, bh_adjust)

#Filtering for p < 0.05
GO_upFlat_001 <- GO_upFlat %>%
        dplyr::filter(bh_adjust<0.001) %>%
        dplyr::arrange(., ontology, bh_adjust)

Plotting!

ontologies<-c("BP", "CC", "MF")

for (category in ontologies) {
    
    go_results <- GO_upFlat_05
    
    go_results<-go_results%>%
      filter(ontology==category)%>%
      filter(bh_adjust != "NA") %>%
      filter(numInCat>10)%>%
      arrange(., bh_adjust)
    
      #Reduce/collapse GO term set with the rrvgo package 
      simMatrix <- calculateSimMatrix(go_results$GOterm,
                                orgdb="org.Ce.eg.db", #c. elegans database
                                ont=category,
                                method="Rel")
    #calculate similarity 
    scores <- setNames(-log(go_results$bh_adjust), go_results$GOterm)
    reducedTerms <- reduceSimMatrix(simMatrix,
                                scores,
                                threshold=0.7,
                                orgdb="org.Ce.eg.db")
    
    #keep only the goterms from the reduced list
    go_results<-go_results%>%
      filter(GOterm %in% reducedTerms$go)
    
    #add in parent terms to list of go terms 
    go_results$ParentTerm<-reducedTerms$parentTerm[match(go_results$GOterm, reducedTerms$go)]
   
    #plot significantly enriched GO terms by Slim Category faceted by slim term 
  GO.plot_upFlat <-  ggplot2::ggplot(go_results, aes(x = ontology, y = term)) + 
    geom_point(aes(size=bh_adjust)) + 
    scale_size(name="Over rep. p-value", trans="reverse", range=c(1,3))+
    facet_grid(ParentTerm ~ ., scales = "free", labeller = label_wrap_gen(width = 5, multi_line = TRUE))+
    theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"),
    strip.text.y = element_text(angle=0, size = 10),
    strip.text.x = element_text(size = 20),
    axis.text = element_text(size = 8),
    axis.title.x = element_blank(),
    axis.title.y = element_blank())
  
  print(GO.plot_upFlat)
  
ggsave(filename=paste0("../../../output/Slope_Base/GOSeq/OriginDEGs_upFlat_P05", "_", category, ".png"), plot=GO.plot_upFlat, dpi=100, width=12, height=100, units="in", limitsize=FALSE)
}
## preparing gene to GO mapping data...
## preparing IC data...
## preparing gene to GO mapping data...
## preparing IC data...

## preparing gene to GO mapping data...
## preparing IC data...

(we should also look into the ones that are positive in one treatment but not the other)